from selenium import webdriver
from selenium.webdriver.common.by import By
import re
import time
import pandas as pd
browser = webdriver.Chrome()
url = 'https://so.eastmoney.com/news/s?keyword=天猫&type=title'
browser.get(url)
code = ''
for i in range(2):
    time.sleep(3)
    code += browser.page_source
    browser.find_element(By.XPATH, '//*[@id="app"]/div[3]/div[1]/div[4]/div/a[5]').click()
browser.quit()
# p_title = '<div class="news_item_t" .*?><a href=".*?"target="_blank">(.*?)</a>'
p_title = '<div class="news_item_t" .*?><a href=".*?" target="_blank">(.*?)</a>'
p_date = '<span class="news_item_time">(.*?)</span>'
title = re.findall(p_title, code, re.S)
date = re.findall(p_date, code, re.S)
# print(len(title))
# print(len(date))
for i in range(len(date)):
    title[i] = re.sub('<.*?>', '', title[i])
    date[i] = date[i].split(' ')[0]
data = {'标题': title, '日期': date}
data = pd.DataFrame(data)
data.to_excel('企业资讯.xlsx', index=False)
